
/*
This code generates SCI weighted exposure to COVID
*/

* population counts by county
import delimited "${data_raw}/geo/zcta_county_rel_10.txt", clear

keep zcta5 geoid poppt
replace geoid = 36061 if inlist(geoid, 36047, 36081, 36085, 36005)
ren (zcta5 geoid) (fr_zcta fips)
collapse (rawsum) poppt, by(fips)

tempfile pop
save `pop'

* COVID-19 case counts
use "${data_derived_cases}/usa_cases_deaths_jh_nyt_unique.dta", clear

* go from daily to weekly level --> take end of week values
gen daily_date = date(date , "YMD")
format daily_date %td
gen week = week(daily_date)

collapse (max) cases* deaths*, by(fips week)

* drop early weeks of 2020
drop if week <11 

* impute zeros in NYT data if missing and JH indicates that these should be zeros
foreach var in cases deaths {
	qui: replace `var'_nyt = 0 if `var'_jh ==0 & `var'_nyt ==. 
	}
	
* reshape wide on weeks
ren (cases* deaths*) (cases*_ deaths*_)
greshape wide cases* deaths*, i(fips) j(week)

* create exposure per capita variables (i.e. per 100,000 people)	
merge 1:1 fips using `pop', nogen keep(3)
	
foreach var of varlist cases_jh_11-deaths_jh_26 {
	qui: gen float `var'_pc = `var' / (poppt / 100000)
}	

* clean up and save
ren fips county	
keep county *jh* poppt
drop *deaths*
* focus on a few selected weeks only to make things smaller 
drop cases_jh_12* cases_jh_14* cases_jh_16* cases_jh_18* cases_jh_2? cases_jh_3?
save "${data_raw_cases}/usa_cases_deaths_cleaned_for_merge.dta", replace

* load SCI data: zip to county
use "${data_derived_sci}/zcta_to_county_SCI.dta", clear

* create 'fraction of friends' (more precisely scaled SCI) variables
gegen float total_scaled_sci_pop = total(scaled_sci_pop), by(user_zcta)
gen float frac_scaled_sci = scaled_sci_pop / total_scaled_sci_pop
drop total_scaled_sci_pop scaled_sci_pop
drop if user_zcta ==.
compress

* merge on covid cases
destring county, replace
fmerge m:1 county using "${data_raw_cases}/usa_cases_deaths_cleaned_for_merge.dta", keep(3) nogen

* focus on cases rather than deaths
ren user_zcta zcta
gen zcta_str = string(zcta, "%05.0f")
drop zcta
ren zcta_str zcta

compress

recast float scaled_sci, force
ren cases_jh_* c_*

* interact cases with SCI 
foreach var of varlist c_11-c_19 {
	gen float sci_`var' = `var' * scaled_sci // total cases, raw SCI
	gen float sci_pc_`var' = `var' * frac_scaled_sci // total cases, relative SCI
}

	
* interact per-capita cases with relative SCI
foreach var of varlist c_11_pc-c_19_pc {
	gen float sci_pc_`var' = `var' * frac_scaled_sci
}	
	

* merge on county level covariates
fmerge m:1 county using "${data_derived_covs}/county_covariates.dta", ///
	keepusing(med_hh_inc_2018 pop_density_2018 frac_urban_2010) keep(1 3) ///
	nogen assert(1 2 3)
	
compress

* interact SCI with county level covariates
foreach var of varlist med_hh_inc_2018 pop_density_2018 frac_urban_2010 {
	gen float sci_`var' = `var' * scaled_sci // raw SCI
	gen float sci_pc_`var' = `var' * frac_scaled_sci // relative SCI
}	
	
* collapse to zip level and save	
collapse (rawsum) sci_*, by(zcta)	

compress

save "${data_derived_exposure}/sci_weighted_cases_short.dta", replace

* clean up files created along the way
erase "${data_raw_cases}/usa_cases_deaths_cleaned_for_merge.dta"
